#In this module, the volumetric moisture content is used as the state, instead of the pressure head
import sys
sys.path.append("core_rl_mz2_65")
from Scheduler_agent import IrrigationScheduler
import numpy as np 
from tensorforce.agents import Agent 
from tensorforce.environments import Environment 
#Create the environment and the agent
environment = environment = Environment.create(environment=IrrigationScheduler, max_episode_timesteps=30)
#environment = environment = Environment.create(environment=IrrigationScheduler, max_episode_timesteps=14)
agent = Agent.create(agent='ppo', environment=environment, batch_size=5, learning_rate=0.00001) #Learning rate and batch size are tuning parameters and they can be adjusted
#agent = Agent.create(agent='ppo', environment=environment, batch_size=64, learning_rate=0.0001)#This did not work fine
#load the lai factors
LAI_factors = np.loadtxt("../../relevant_data/lai_rl_all.txt")
score_history = []
n_episodes = 500000
points_to_save_model = [100000-1, 200000-1, 300000-1, 400000-1, 500000-1]
def set_rooting_depth(episodeNumber):
        if episodeNumber in range(0, 25000):
            return  0.50
        elif episodeNumber in range(25000, 50000):
            return  1.00
        elif episodeNumber in range(50000, 75000):
            return  0.50
        elif episodeNumber in range(75000, 100000):
            return  1.00
        elif episodeNumber in range(100000, 125000):
            return  0.50
        elif episodeNumber in range(125000, 150000):
            return  1.00
        elif episodeNumber in range(150000, 175000):
            return  0.50
        elif episodeNumber in range(175000, 200000):
            return  1.00
        elif episodeNumber in range(200000, 225000):
            return  0.50
        elif episodeNumber in range(225000, 250000):
            return  1.00
        elif episodeNumber in range(250000, 275000):
            return  0.50
        elif episodeNumber in range(275000, 300000):
            return  1.00
        elif episodeNumber in range(300000, 325000):
            return  0.50
        elif episodeNumber in range(325000, 350000):
            return  1.00
        elif episodeNumber in range(350000, 375000):
            return  0.50
        elif episodeNumber in range(375000, 400000):
            return  1.00
        elif episodeNumber in range(400000, 425000):
           return   0.50
        elif episodeNumber in range(425000, 450000):
            return  1.00
        elif episodeNumber in range(450000, 475000):
            return  0.50
        elif episodeNumber in range(475000, 500000):
            return  1.00

def TrainAgent():
    for _ in range(n_episodes):
        states=environment.reset()
        environment.set_rooting_depth(_)
        episodic_lai = LAI_factors[_*30:(_+1)*30]
        terminal=False 
        score = 0
        i = 0
        while not terminal:
            environment.set_LAI_factor(episodic_lai[i])
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)
            score += reward 
            states[-2] = set_rooting_depth(_)
            i=i+1       
            pass
        score_history.append(score)
        print(f"Episode number {_+1} and the score is {score} and the average score is {np.mean(score_history[-100:])}") 
        pass 
        if _ in points_to_save_model:
            k = (_+1)/100000
            print(f" ========== Saving agents after {_+1} episodes ==========")
            agent_name = 'Scheduler_agent -' + str(int(k))
            agent.save("./trained_agents/", filename=agent_name)
            np.savetxt("./results/reward_trajectory_" + str(int(k)) + ".txt", np.array(score_history))

if __name__=='__main__':
    TrainAgent()
    pass
